Importing necessary packages



In [3]:

    
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

Count Vectorizer turns sentences into word counts



In [4]:

    
corpus = ['This is first sentence', 'Here is the second sentence', 'Third sentence']



In [5]:

    
count_vec = CountVectorizer()
features = count_vec.fit_transform(corpus)



In [6]:

    
pd.DataFrame(features.todense(), columns=count_vec.get_feature_names())

TFIDF Vectorizer turns sentences into vectors using probabilities



In [7]:

    
tfidf = TfidfVectorizer()
features_tfidf = tfidf.fit_transform(corpus)



In [8]:

    
pd.DataFrame(features_tfidf.todense(), columns=tfidf.get_feature_names())

	first	here	is	second	sentence	the	third	this
0	0.584483	0.000000	0.444514	0.000000	0.345205	0.000000	0.000000	0.584483
1	0.000000	0.504611	0.383770	0.504611	0.298032	0.504611	0.000000	0.000000
2	0.000000	0.000000	0.000000	0.000000	0.508542	0.000000	0.861037	0.000000